Introduction

Load libraries

library(Seurat)
library(canceRbits)
library(dplyr)
library(patchwork)
library(DT)
library(SCpubr)
library(tibble)
library(dittoSeq)
library(scRepertoire)
library(reshape2)
library(viridis)
library(tidyr)

library(grDevices)
library(ggpubr)
library(ggplot2)
library(ggnewscale)

library(RColorBrewer)
library(scales)

library(enrichplot)
library(clusterProfiler)
library("org.Hs.eg.db")
library(DOSE)
library(msigdbr)
library(stringr)

Load single cell RNA-Seq data

# Load seurat object containing single cell pre-processed and annotated data, in the metadata folder

srat <- readRDS(params$path_to_data)
meta <- srat@meta.data
meta$WHO <- "SD"
meta$WHO[meta$patient %in% c("NeoBCC007_post", "NeoBCC008_post", "NeoBCC012_post", "NeoBCC017_post")] <- "CR"
meta$WHO[meta$patient %in% c("NeoBCC004_post", "NeoBCC006_post", "NeoBCC010_post", "NeoBCC011_post")] <- "PR"
srat <- AddMetaData(srat, meta$WHO, col.name = "WHO")
srat$WHO <- factor(srat$WHO, levels = c("CR", "PR", "SD"))

Define colors

colors_B <- c(
  "34" = "#ae017e",
  "7"  =  "#377eb8",
  "21" =  "#f781bf",  
  "38" = "#fed976",
  "15" = "#a6cee3" ,
  "22" = "#e31a1c"   ,
   "3" = "#9e9ac8",
 "4"   = "#fdb462" ,
 "24"  = "#b3de69"
)


colors_clonotype = c("Small (1e-04 < X <= 0.001)"   = "#8c6bb1",
           "Medium (0.001 < X <= 0.01)"   = "#41b6c4",
           "Large (0.01 < X <= 0.1)"      = "#fec44f",
           "Hyperexpanded (0.1 < X <= 1)" = "#ce1256"
)

colors_Ig = c("IGHA1" = "#addd8e",
           "IGHA2" = "#238443",
           "IGHM"  = "#dd3497" , 
           "IGHD"  =  "#41b6c4",
           "IGHG1" = "#fec44f",
           "IGHG2" = "#fe9929",
           "IGHG3" = "#ec7014",
           "IGHG4" = "#fee391"
)

Figure 5a

s   <- subset(srat, subset = anno_l1 %in% c("B cells","Plasma cells"))

s@meta.data$seurat_clusters <- factor(s@meta.data$seurat_clusters, levels=c("34","7", "21",  "38",
                                                                             "15", "22","3", "4", "24"))
    
p <- SCpubr::do_DimPlot(sample = s, 
                   reduction = "umap", 
                   label = TRUE, 
                   repel = TRUE, 
                   label.color = "black", 
                   legend.position = "none", 
                   group.by = "seurat_clusters", 
                   font.size = 25,
                   pt.size = 0.5, 
                   colors.use = colors_B) + theme_minimal()  + NoLegend() + theme(text = element_text(size=20))

p

DT::datatable(p$data, 
              caption = ("Figure 5a"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))

Figure 5b

Idents(s) <- s$seurat_clusters

genes <- list( "B" = c("MS4A1", "CD19"),
               "Ag presentation" = c("HLA-DRA", "HLA-DRB1", "HLA-DPB1", "CD40"),
               "Cytotox" = c("GZMK", "PRF1"),
               "Mem" = "CD27",
               "Plasma" = c("IGKC", "IGHG1", "CD38", "SDC1", "JCHAIN"),
               "Cell state" = c("G2M.Score", "S.Score","percent.mito")  
               )

p <- SCpubr::do_DotPlot(sample = s, 
                        features = genes, 
                        font.size = 18, 
                        legend.length = 12,  
                        legend.type = "colorbar", 
                        dot.scale = 8,  
                        sequential.palette ="PiYG",
                        scale = TRUE,
                        sequential.direction = -1
) 

p

DT::datatable(p$data, 
              caption = ("Figure 5b"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))

Figure 5c

q1 <- SCpubr::do_BarPlot( s,
                         group.by = "seurat_clusters",
                         split.by = "WHO",
                         position = "fill",
                         flip = TRUE,
                         order=FALSE,
                         legend.position = "none",
                         font.size = 32 ,
                         colors.use = colors_B,
                         xlab = "",
                         ylab = ""
                          
                   ) + xlab("Response") + ylab("% of B and plasma cells")



q1

DT::datatable(q1$data, 
              caption = ("Figure 5c"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))
q1 <- SCpubr::do_BarPlot( s,
                         group.by = "seurat_clusters",
                         split.by = "Pathological.Response",
                         position = "fill",
                         flip = TRUE,
                         order=FALSE,
                         legend.position = "none",
                         font.size = 32 ,
                         colors.use = colors_B,
                         xlab = "",
                         ylab = ""
                          
                   ) + xlab("Response") + ylab("% of B and plasma cells")



q1

DT::datatable(q1$data, 
              caption = ("Figure 5c"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))

Open and pre-process clonotyoe data

This is the code used to create the combined expression object from the BCR-Seq data. Due to personal confidentiality, the object cannot be provided to GEO, but raw data are available on request on EGA.

As default, the chunk related to BCR-Seq are not run.

# Run from raw BCR data stored in path_to_data or load the combined BCR data in the following chunk
# loading the data
vdj <- list()
dirs <- list.dirs(path = path_to_data, full.names = TRUE, recursive = FALSE)

samples <- c("NeoBCC007_post",
             "NeoBCC008_post",
             "NeoBCC012_post",
             "NeoBCC017_post",
             "NeoBCC004_post",
             "NeoBCC005_post",
             "NeoBCC006_post",
             "NeoBCC010_post",
             "NeoBCC011_post",
             "NeoBCC014_post",
             "NeoBCC015_post",
             "NeoBCC018_post")




for (i in (samples)){
  dir_TCR <- paste0(path_to_data, i, "_BCR_VDJ")
  vdj[[i]] <- read.csv(paste0(dir_TCR, "/filtered_contig_annotations.csv"))
}

head(vdj[[1]])

# combine the contigs
combined <- combineBCR(vdj, 
                samples = gsub("_pre|_post", "", samples), 
                removeNA = TRUE, removeMulti = TRUE)


merge_combined <- bind_rows(combined, .id = "sample")
if(params$accessibility == "unlock"){
  combined <- readRDS(file.path(params$path_to_BCR))
  
  srat <- RenameCells(srat,new.names = paste0(srat$patient, "_",gsub("(_1|_2|_3|_4|_5|_6|_7|_8|_9)*", "",colnames(srat))))
  srat <- RenameCells(srat,new.names = gsub("_post", "",colnames(srat)))

  srat$sample <- gsub("_post", "",srat$patient)


  seurat <- combineExpression(combined, srat, 
                  cloneCall = "strict", 
                  group.by = "sample", chain = "both",
                  proportion = TRUE,
                  filterNA = TRUE)



  s   <- subset(seurat, subset = anno_l1 %in%  c("B cells","Plasma cells") )


  s@meta.data$cloneType <- factor(s@meta.data$cloneType, levels = unique(s$cloneType))
} else{
  print("Please request access to the BCR-Seq data.")
}

Figure 5d

Done on the highly confident B and plasma cells with exactly 2 IGH and IGL chains

if(params$accessibility == "unlock"){

df <- s@meta.data

df <- df %>% 
    separate( CTgene, c("IGH", "IGL"), sep = "_") %>%
  separate( IGH, c("V", "D", "J", "C"), "\\.")
   
 
s <- AddMetaData(s, df$C, col.name = "Ig_level")
sss <- subset(s, subset = Ig_level != "NA")
dim(sss)

sss@meta.data$cloneType <- factor(sss@meta.data$cloneType, levels = c("Hyperexpanded (0.1 < X <= 1)", "Large (0.01 < X <= 0.1)", "Medium (0.001 < X <= 0.01)", "Small (1e-04 < X <= 0.001)"))

# define cell group membership
Idents(sss) <- sss$cloneType




sss@meta.data$Ig_level <- factor(sss@meta.data$Ig_level, levels = c("IGHA1",
         "IGHA2",
         "IGHM", 
         "IGHD",
         "IGHG1",
         "IGHG2",
         "IGHG3",
         "IGHG4"))


 

q1 <- SCpubr::do_BarPlot( sss,
                         group.by = "Ig_level",
                         split.by = "anno_l1",
                         position = "fill",
                         flip = TRUE,
                         order=FALSE,
                         legend.position = "right",
                         font.size = 30     , 
                         colors.use = colors_Ig) +
  xlab("") +
  ylab("Frequency of Ig isotypes")



q1
DT::datatable(q1$data, 
              caption = ("Figure 5d"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))




q2 <- SCpubr::do_BarPlot( sss,
                         group.by = "Ig_level",
                         split.by = "cloneType",
                         position = "fill",
                         flip = TRUE,
                         order=FALSE,
                         legend.position = "right",
                         font.size = 30     , 
                         colors.use = colors_Ig               ) +
  xlab("") +
  ylab("Frequency of Ig isotypes")



q2

p <- q1|q2
DT::datatable(q2$data, 
              caption = ("Figure 5d"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))

} else{
  p <- "Please request access to the BCR-Seq data."
}
print(p)

Figure 5f

if(params$accessibility == "unlock"){

Idents(s) <- s$seurat_clusters
s@meta.data$seurat_clusters <- factor(s@meta.data$seurat_clusters, levels=c("34","7", "21",  "38",
                                                                             "15", "22","3", "4", "24"))

s@meta.data$cloneType <- factor(s@meta.data$cloneType, levels = c("Hyperexpanded (0.1 < X <= 1)", "Large (0.01 < X <= 0.1)", "Medium (0.001 < X <= 0.01)", "Small (1e-04 < X <= 0.001)"))

# define cell group membership
Idents(s) <- s$cloneType

de_markers <- DElegate::FindAllMarkers2(s, replicate_column = "patient", method = "edger", min_fc = 0, min_rate = 0.1)

signature_h <- de_markers$feature[de_markers$group1=="Hyperexpanded (0.1 < X <= 1)" ]
background <- rownames(s)

# enricher for cnet plot
GO_H_gene_sets = msigdbr(species = "human", category = "H")
msigdbr_t2g = GO_H_gene_sets %>% dplyr::distinct(gs_name, gene_symbol) %>% as.data.frame()

ego_h <- enricher(gene = signature_h, universe = background, TERM2GENE = msigdbr_t2g)

 
b <- barplot(ego_h, title = "Hallmarks´enrichment of hyperexpanded clones")


tmp <- b$data

tmp$ID <- str_trunc(tmp$ID, 100, "right")
tmp$ID <- factor(tmp$ID, levels = tmp$ID[order(tmp$Count)])

p <- ggplot(tmp, aes(Count, ID)) +
    geom_bar(stat = "identity", color="black", fill =  colors_clonotype["Hyperexpanded (0.1 < X <= 1)"]) +
   theme_classic()+ 
    geom_text(
        aes(label = (paste( "padj=", format(p.adjust,scientific = TRUE, digits = 2)))),
        color = "black",
        size = 6,
        hjust=1,
        position = position_dodge(0.5)  ) +
     theme(text = element_text(size=18)) +
  ggtitle("Hyperexpanded clones")


p

DT::datatable(p$data, 
              caption = ("Figure 5f"),
              extensions = 'Buttons', 
              options = list( dom = 'Bfrtip',
              buttons = c( 'csv', 'excel')))
} else{
 p <- "Please request access to the BCR-Seq data."
}
return(p)

Session Info

sessionInfo()
## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Vienna
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] stringr_1.5.1         msigdbr_7.5.1         DOSE_3.26.2           org.Hs.eg.db_3.17.0  
##  [5] AnnotationDbi_1.62.2  IRanges_2.34.1        S4Vectors_0.38.2      Biobase_2.60.0       
##  [9] BiocGenerics_0.46.0   clusterProfiler_4.8.3 enrichplot_1.20.3     scales_1.3.0         
## [13] RColorBrewer_1.1-3    ggnewscale_0.4.10     tidyr_1.3.1           scRepertoire_1.10.1  
## [17] dittoSeq_1.12.2       canceRbits_0.1.6      ggpubr_0.6.0.999      ggplot2_3.5.1        
## [21] viridis_0.6.5         viridisLite_0.4.2     reshape2_1.4.4        tibble_3.2.1         
## [25] SCpubr_2.0.2          DT_0.32               patchwork_1.2.0       dplyr_1.1.4          
## [29] Seurat_5.0.3          SeuratObject_5.0.1    sp_2.1-3             
## 
## loaded via a namespace (and not attached):
##   [1] fs_1.6.4                    matrixStats_1.2.0           spatstat.sparse_3.0-3      
##   [4] bitops_1.0-7                HDO.db_0.99.1               httr_1.4.7                 
##   [7] doParallel_1.0.17           tools_4.3.0                 sctransform_0.4.1          
##  [10] backports_1.4.1             utf8_1.2.4                  R6_2.5.1                   
##  [13] vegan_2.6-4                 lazyeval_0.2.2              uwot_0.1.16                
##  [16] mgcv_1.9-1                  permute_0.9-7               withr_3.0.0                
##  [19] gridExtra_2.3               progressr_0.14.0            cli_3.6.2                  
##  [22] spatstat.explore_3.2-7      fastDummies_1.7.3           scatterpie_0.2.1           
##  [25] isoband_0.2.7               labeling_0.4.3              sass_0.4.9                 
##  [28] spatstat.data_3.0-4         ggridges_0.5.6              pbapply_1.7-2              
##  [31] yulab.utils_0.1.4           gson_0.1.0                  stringdist_0.9.12          
##  [34] parallelly_1.37.1           limma_3.56.2                RSQLite_2.3.5              
##  [37] VGAM_1.1-10                 rstudioapi_0.16.0           generics_0.1.3             
##  [40] gridGraphics_0.5-1          ica_1.0-3                   spatstat.random_3.2-3      
##  [43] crosstalk_1.2.1             car_3.1-2                   GO.db_3.17.0               
##  [46] Matrix_1.6-5                ggbeeswarm_0.7.2            fansi_1.0.6                
##  [49] abind_1.4-5                 lifecycle_1.0.4             edgeR_3.42.4               
##  [52] yaml_2.3.8                  carData_3.0-5               SummarizedExperiment_1.30.2
##  [55] qvalue_2.32.0               Rtsne_0.17                  blob_1.2.4                 
##  [58] grid_4.3.0                  promises_1.2.1              crayon_1.5.2               
##  [61] miniUI_0.1.1.1              lattice_0.22-6              cowplot_1.1.3              
##  [64] KEGGREST_1.40.1             pillar_1.9.0                knitr_1.45                 
##  [67] fgsea_1.26.0                GenomicRanges_1.52.1        future.apply_1.11.1        
##  [70] codetools_0.2-19            fastmatch_1.1-4             leiden_0.4.3.1             
##  [73] glue_1.7.0                  downloader_0.4              ggfun_0.1.5                
##  [76] data.table_1.15.2           treeio_1.24.3               vctrs_0.6.5                
##  [79] png_0.1-8                   spam_2.10-0                 gtable_0.3.5               
##  [82] assertthat_0.2.1            cachem_1.1.0                xfun_0.43                  
##  [85] S4Arrays_1.0.6              mime_0.12                   tidygraph_1.3.1            
##  [88] survival_3.5-8              DElegate_1.2.1              SingleCellExperiment_1.22.0
##  [91] pheatmap_1.0.12             iterators_1.0.14            fitdistrplus_1.1-11        
##  [94] ROCR_1.0-11                 nlme_3.1-164                ggtree_3.13.0.001          
##  [97] bit64_4.0.5                 RcppAnnoy_0.0.22            evd_2.3-6.1                
## [100] GenomeInfoDb_1.36.4         bslib_0.6.2                 irlba_2.3.5.1              
## [103] vipor_0.4.7                 KernSmooth_2.23-22          DBI_1.2.2                  
## [106] colorspace_2.1-0            ggrastr_1.0.2               tidyselect_1.2.1           
## [109] bit_4.0.5                   compiler_4.3.0              SparseM_1.81               
## [112] DelayedArray_0.26.7         plotly_4.10.4               shadowtext_0.1.3           
## [115] lmtest_0.9-40               digest_0.6.35               goftest_1.2-3              
## [118] spatstat.utils_3.0-4        rmarkdown_2.26              XVector_0.40.0             
## [121] htmltools_0.5.8             pkgconfig_2.0.3             sparseMatrixStats_1.12.2   
## [124] MatrixGenerics_1.12.3       highr_0.10                  fastmap_1.2.0              
## [127] rlang_1.1.4                 htmlwidgets_1.6.4           shiny_1.8.1                
## [130] farver_2.1.2                jquerylib_0.1.4             zoo_1.8-12                 
## [133] jsonlite_1.8.8              BiocParallel_1.34.2         GOSemSim_2.26.1            
## [136] RCurl_1.98-1.14             magrittr_2.0.3              GenomeInfoDbData_1.2.10    
## [139] ggplotify_0.1.2             dotCall64_1.1-1             munsell_0.5.1              
## [142] Rcpp_1.0.12                 evmix_2.12                  babelgene_22.9             
## [145] ape_5.8                     reticulate_1.35.0           truncdist_1.0-2            
## [148] stringi_1.8.4               ggalluvial_0.12.5           ggraph_2.2.1               
## [151] zlibbioc_1.46.0             MASS_7.3-60.0.1             plyr_1.8.9                 
## [154] parallel_4.3.0              listenv_0.9.1               ggrepel_0.9.5              
## [157] forcats_1.0.0               deldir_2.0-4                Biostrings_2.68.1          
## [160] graphlayouts_1.1.1          splines_4.3.0               tensor_1.5                 
## [163] locfit_1.5-9.9              igraph_2.0.3                spatstat.geom_3.2-9        
## [166] cubature_2.1.0              ggsignif_0.6.4              RcppHNSW_0.6.0             
## [169] evaluate_0.23               foreach_1.5.2               tweenr_2.0.3               
## [172] httpuv_1.6.15               RANN_2.6.1                  purrr_1.0.2                
## [175] polyclip_1.10-6             future_1.33.2               scattermore_1.2            
## [178] ggforce_0.4.2               broom_1.0.5                 xtable_1.8-4               
## [181] tidytree_0.4.6              RSpectra_0.16-1             rstatix_0.7.2              
## [184] later_1.3.2                 gsl_2.1-8                   aplot_0.2.3                
## [187] beeswarm_0.4.0              memoise_2.0.1               cluster_2.1.6              
## [190] powerTCR_1.20.0             globals_0.16.3